{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Guardar y deployar un modelo predictivo\n", "\n", "Para este ejemplo utilizaremos un árbol de decisión" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "from xgboost.sklearn import XGBClassifier\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PregnanciesGlucoseBloodPressureSkinThicknessInsulinBMIDiabetesPedigreeFunctionAgeOutcome
061487235033.60.627501
11856629026.60.351310
28183640023.30.672321
318966239428.10.167210
40137403516843.12.288331
\n", "
" ], "text/plain": [ " Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n", "0 6 148 72 35 0 33.6 \n", "1 1 85 66 29 0 26.6 \n", "2 8 183 64 0 0 23.3 \n", "3 1 89 66 23 94 28.1 \n", "4 0 137 40 35 168 43.1 \n", "\n", " DiabetesPedigreeFunction Age Outcome \n", "0 0.627 50 1 \n", "1 0.351 31 0 \n", "2 0.672 32 1 \n", "3 0.167 21 0 \n", "4 2.288 33 1 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv(os.path.join('../Datasets/diabetes.csv'))\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']\n", "X = df[feature_cols]\n", "Y = df['Outcome']\n", "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1) # 70% training, 30% test" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Optimización de parámetros" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "#https://github.com/conda-forge/hyperopt-feedstock\n", "from hyperopt import fmin, tpe, hp, STATUS_OK,Trials" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "space ={\n", " 'n_estimators':hp.quniform('n_estimators',100,1000,1), \n", " 'learning_rate':hp.quniform('learning_rate',0.025,0.5,0.025),\n", " 'max_depth':hp.quniform('max_depth',1,13,1),\n", " 'subsample': hp.quniform('subsample',0.5,1,0.05),\n", " 'colsample_bytree':hp.quniform('colsample_bytree',0.5,1,0.05),\n", " 'nthread':6,\n", " 'silent':1\n", "}" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def objective(params):\n", " params['n_estimators'] = int(params['n_estimators'])\n", " params['max_depth'] = int(params['max_depth']) \n", " classifier = XGBClassifier(**params)\n", " classifier.fit(X_train,Y_train) \n", " accuracy = accuracy_score(Y_test, classifier.predict(X_test))\n", " return {'loss': 1-accuracy, 'status': STATUS_OK}" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[17:15:14] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576: \n", "Parameters: { \"silent\" } might not be used.\n", "\n", " This could be a false alarm, with some parameters getting used by language bindings but\n", " then being mistakenly passed down to XGBoost core, or some parameter actually being used\n", " but getting flagged wrongly here. Please open an issue if you find any such cases.\n", "\n", "\n", "[17:15:14] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n", " 0%| | 0/20 [00:00" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Flask\n", "\n", "Flask es un **framework** minimalista escrito en Python que permite crear aplicaciones web rápidamente y con un mínimo número de líneas de código - **Wikipedia**.\n", "\n", "__[Flask](https://flask.palletsprojects.com/en/1.1.x/)__\n", "\n", "Ahora, utilizando el clasificador guardado anteriormente en un archivo binario, se creará un servicio API REST en Flask para poder utilizarlo. Para hacerlo funcionar hacerlo, colocar el código en un archivo .py y hacerlo correr en la consola." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# http://flask.palletsprojects.com/en/1.1.x/quickstart/#quickstart\n", "from flask import Flask, request, jsonify\n", "import os\n", "import pandas as pd\n", "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score\n", "import pickle\n", "\n", "app = Flask(__name__)\n", "\n", "classifier_filepath = os.path.join(\"tree_v6.pkl\")\n", "classifier_file = open(classifier_filepath, \"rb\")\n", "classifier = pickle.load(open(classifier_filepath, \"rb\"))\n", "classifier_file.close()\n", "\n", "# Desactiva el API /predict del clasificador.\n", "# retorna {\"message\": \"/predict disabled\"}, 200 OK\n", "@app.route('/disable', methods=['GET'])\n", "def disable():\n", " global ACTIVATED\n", " ACTIVATED = False\n", " return {'message': '/predict disabled'}, 200\n", "\n", "# Activa el API /predict del clasificador.\n", "# retorna {\"message\": \"/predict enabled\"}, 200 OK\n", "@app.route('/enable', methods=['GET'])\n", "def enable():\n", " global ACTIVATED\n", " ACTIVATED = True\n", " return {'message': '/predict enabled'}, 200\n", "\n", "# Entrena el modelo con los nuevos hyper-parámetros y retorna la nueva exactitud. Por ejemplo, {\"accuracy\": 0.81}, 200 OK\n", "# Se pueden enviar los siguiente hyper-parámetros: { \"n_estimators\": 10, \"criterion\": \"gini\", \"max_depth\": 7 }\n", "# \"criterion\" puede ser \"gini\" o \"entropy\", \"n_estimators\" y \"max_depth\" son un número entero positivo\n", "# Unicamente \"max_depth\" es opcional en cuyo caso se deberá emplear None. Si los otros hyper-parámetros no están presentes se retorna:\n", "# {\"message\": \"missing hyper-parameter\"}, 404 BAD REQUEST\n", "# Finalmente, sólo se puede ejecutar este endpoint después de ejecutar GET /disable. En otro caso retorna {\"message\": \"can not reset an enabled classifier\"}, 400 BAD REQUEST\n", "@app.route('/reset', methods=['POST'])\n", "def reset():\n", " if ACTIVATED:\n", " return {\"message\": \"can not reset an enabled classifier\"}, 400\n", " json_request = request.get_json(force=True)\n", " if 'criterion' not in json_request or 'n_estimators' not in json_request:\n", " return {\"message\": \"missing hyper-parameter\"}, 400\n", "\n", " classifier.n_estimators = json_request.get('n_estimators')\n", " classifier.criterion = json_request.get('criterion')\n", " classifier.max_depth = json_request.get('max_depth')\n", " \n", " df = pd.read_csv(os.path.join(\"diabetes.csv\"))\n", " feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age',\n", " 'Glucose', 'BloodPressure', 'DiabetesPedigreeFunction']\n", " X = df[feature_cols]\n", " Y = df[\"Outcome\"]\n", " X_train, X_test, Y_train, Y_test = train_test_split(\n", " X, Y, test_size=0.3, random_state=1)\n", "\n", " classifier.fit(X_train, Y_train)\n", " return {'accuracy': accuracy_score(Y_test, classifier.predict(X_test))}, 200\n", "\n", "# Recibe una lista de observaciones y retorna la clasificación para cada una de ellas.\n", "# Los valores en cada observación se corresponden con la siguientes variables:\n", "#['Pregnancies', 'Insulin', 'BMI', 'Age', 'Glucose', 'BloodPressure', 'DiabetesPedigreeFunction']\n", "# Por ejemplo: para estas observaciones:\n", "# [\n", "#\t[7,135,26.0,51,136,74,0.647],\n", "#\t[9,175,34.2,36,112,82,0.260]\n", "# ]\n", "@app.route('/predict', methods=['POST'])\n", "def predict():\n", " if not ACTIVATED:\n", " return {\"message\": \"classifier is not enabled\"}, 400\n", " predict_request = request.get_json(force=True)\n", " predict_response = classifier.predict(predict_request)\n", " return {'cases': predict_request,\n", " 'diabetes': predict_response.tolist()}\n", "\n", "\n", "if __name__ == '__main__':\n", " app.run(port=8080, debug=True)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" } }, "nbformat": 4, "nbformat_minor": 2 }